#!/usr/bin/env python 
# coding:utf-8
# @Time :9/10/18 18:27

from common.constant import SpiderStatus


SCHEDULE_CONF = {

    # 调度信息存储表
    'schedule_table_name': "wenshu_scheduler_list",

    'host': 'wenshu.court.gov.cn',

    # 当前需要调度的省份
    # 'province': 'gsxt',
    # 选择条件
    "list_select_list": [
        # 优先采集没有采集过列表页的
        {'search_status': SpiderStatus.UNSEARCHED},
        {'search_status': SpiderStatus.SEARCH_FAILDED},
        # {'search_status': SEARCH_LIST_SUCCESS, "href": None},
    ],

    # # 详情页选择条件
    # "detail_select_list": [
    #     # 优先采集没有采集过列表页的
    #     {'crawl_status': None, "href": {"$ne": None}},
    #     {'crawl_status': CRAWL_DETAIL_FAIL, "href": {"$ne": None}},
    #     {'crawl_status': CRAWL_DETAIL_PART_SUCCESS, "href": {"$ne": None}},
    # ],

    # 索引，可选，如果表没有建立索引 则需要填写
    'index': ['search_status', 'crawl_status', 'href'],

    # 消息队列
    "tube": "offline_extract_info"


}